* ------------------------------------------------------------------------------
* Calculate indices of assortativeness and test statistics to compare 
* assortativeness across cohorts
* ------------------------------------------------------------------------------


* Routines
* ------------------------------------------------------------------------------

* Calculate indices of assortativeness for given 2x2 matrix:
* odds ratio, Chi-square, Minimum distance, Likelihood ratio, normalised trace

	cap program drop calcIdx
	program define calcIdx
	
	{
		
		* Odds ratio
		gen odds = (a*d) / (b*c)
		
		* Xsq
		gen Xsq = (a*d - b*c)^2 / ((a+b)*(c+d)*(a+c)*(b+d))
							
		* minimum distance
		gen x  = c + (b<c)*(b-c)
		gen MD = (a*d - b*c) / ((x+d)*(x+a)) 
		drop x
							
		* likelihood ratios
		gen L1 = a*(a+b+c+d)/((a+b)*(a+c))
		gen L2 = d*(a+b+c+d)/((d+b)*(d+c))
		
		* weighted likelihood ratio
		gen w1 = (a+b)*(a+c) / ((a+b)*(a+c) + (d+b)*(d+c))
		gen wL = w1*L1 + (1-w1)*L2
		drop w1
		
		* normalised trace
		gen T = (a+d)/(a+b+c+d)

	}
	
	end
		
	

* Calculate various indexes of assortativess, their changes across cohorts 
* and t-statistics for figures in Table 3 and Table 5 (Online Appendix)

	cap program drop calcSortTestStat
	program define calcSortTestStat
	
	{
		
		** Prepare variables
		** ---------------------------------------------------------------------
				
			qui gen a = .
			qui gen b = .
			qui gen c = .
			qui gen d = .
			
		** Local indices: PG vs C
		** ---------------------------------------------------------------------

			local s1=${Ns}-1	// sorting at the top
			local s2=${Ns}	    // sorting at the top
		
			* 2x2 sorting matrix
				qui replace a = Sm`s2'w`s2'
				qui replace b = Sm`s2'w`s1'
				qui replace c = Sm`s1'w`s2'
				qui replace d = Sm`s1'w`s1'
			
			* indices
				qui calcIdx
				foreach v in odds Xsq MD L1 L2 wL T { 
					qui rename `v' `v'_`s1'`s2'
				}				

		** Local indices: C vs SC
		** ---------------------------------------------------------------------

			local s1=${Ns}-2	// sorting at the top
			local s2=${Ns}-1    // sorting at the top
		
			* 2x2 sorting matrix
				qui replace a = Sm`s2'w`s2'
				qui replace b = Sm`s2'w`s1'
				qui replace c = Sm`s1'w`s2'
				qui replace d = Sm`s1'w`s1'
			
			* indices
				qui calcIdx
				foreach v in odds Xsq MD L1 L2 wL T { 
					qui rename `v' `v'_`s1'`s2'
				}
				
		** Local indices: PG+C vs SC
		** ---------------------------------------------------------------------
		
			* 2x2 sorting matrix
				qui replace a = Sm5w5 + Sm5w4 + Sm4w5 + Sm4w4
				qui replace b = Sm5w3 + Sm4w3
				qui replace c = Sm3w5 + Sm3w4
				qui replace d = Sm3w3
			
			* indices
				qui calcIdx
				foreach v in odds Xsq MD L1 L2 wL T { 
					qui rename `v' `v'_345
				}	

			
		** Indices for aggregate sorting matrices
		** ---------------------------------------------------------------------
					
			* sum of all elements in sorting matrix, conditional on marriage
			
				qui gen sumS = 0
				forvalues s1 = 1/$Ns {
				forvalues s2 = 1/$Ns {
					qui replace sumS = sumS + Sm`s1'w`s2'
				}
				}
			
			* calculate indices: marrying your equal vs marrying someone else
			*                    PG+C vs others
			
				* 2x2 sorting matrix
				qui replace a = Sm5w5 + Sm5w4 + Sm4w5 + Sm4w4
				qui replace b = Sm5w1 + Sm5w2 + Sm5w3 + Sm4w1 + Sm4w2 + Sm4w3
				qui replace c = Sm1w5 + Sm2w5 + Sm3w5 + Sm1w4 + Sm2w4 + Sm3w4
				qui replace d = sumS-a-b-c

				* indices
				qui calcIdx
				foreach v in odds Xsq MD L1 L2 wL T { 
					qui rename `v' `v'_4p
				}
				

		** Global indices
		** ---------------------------------------------------------------------
				
			gen quocient = 0
			gen trace = 0
		
			forvalues s=1/$Ns {
				replace quocient = quocient + ///
								   (Sm1w`s' + Sm2w`s' + Sm3w`s' + Sm4w`s' + Sm5w`s') * ///
								   (Sm`s'w1 + Sm`s'w2 + Sm`s'w3 + Sm`s'w4 + Sm`s'w5)
				replace trace = trace + Sm`s'w`s'
			}
			
			gen globalT  = trace / sumS
			gen globalwL = sumS * trace / quocient
			

		* clean & reshape data
		** ---------------------------------------------------------------------
		
			qui keep rep coh odds* Xsq* MD* L1* L2* wL* T* global* 
			foreach I in odds Xsq MD L1 L2 wL T globalT globalwL {   
				rename `I'* `I'*_c
			}
			
			reshape wide odds* Xsq* MD* L1* L2* wL* T* globalT* globalwL*, i(rep) j(coh)
			sort rep

			
		** construct across cohort differences in indices and t-stats
		** ---------------------------------------------------------------------
		
			* construct across cohort differences in indices a t-statistics
			global indices "odds_345 L1_345 L2_345 wL_345 T_345 Xsq_345 MD_345 odds_45 L1_45 L2_45 wL_45 T_45 Xsq_45 MD_45 odds_34 L1_34 L2_34 wL_34 T_34 Xsq_34 MD_34 odds_4p L1_4p L2_4p wL_4p T_4p Xsq_4p MD_4p globalwL globalT"
					
			* calculate differences and t-stats
			local ix = 0
			foreach I of global indices {
		
				local ix = `ix'+1

					* differences in indices
				qui gen dI`ix' = `I'_c2 - `I'_c1
					
					* calculate & store SE of difference
				qui sum dI`ix' if rep>0
				global se_dI`ix' = r(sd)
					
					* calculate t-stat for original sample
				qui gen t_dI`ix' = abs(dI`ix')/${se_dI`ix'} if rep==0

					* calculate t-stat for BS repetitions
				qui replace t_dI`ix' = abs(dI`ix'-dI`ix'[1]) / ${se_dI`ix'} if rep>0

			}
			
			
		** Clean and reshape data
		** ----------------------

			qui keep rep dI* t_dI*
			qui reshape long dI t_dI, i(rep) j(index)
			label define index 1  "PG+C vs SC: odds ratio"       ///
							   2  "PG+C vs SC: L1"               ///
							   3  "PG+C vs SC: L2"               ///
							   4  "PG+C vs SC: weighted L"       /// 
							   5  "PG+C vs SC: Trace"            /// 
							   6  "PG+C vs SC: Xsq"              ///
							   7  "PG+C vs SC: Min Distance"     /// 
							   8  "PG vs C: odds ratio"          ///
							   9  "PG vs C: L1"                  ///
							   10 "PG vs C: L2"                  ///
							   11 "PG vs C: weighted L"          ///
							   12 "PG vs C: Trace"               ///
							   13 "PG vs C: Xsq"                 ///
							   14 "PG vs C: Min Distance"        ///
							   15 "C vs SC: odds ratio"          ///
							   16 "C vs SC: L1"                  ///
							   17 "C vs SC: L2"                  ///
							   18 "C vs SC: weighted L"          ///
							   19 "C vs SC: Trace"               ///
							   20 "C vs SC: Xsq"                 ///
							   21 "C vs SC: Min Distance"        ///
							   22 "PG+C vs others: odds ratio"   ///
							   23 "PG+C vs others: L1"           ///
							   24 "PG+C vs others: L2"           ///
							   25 "PG+C vs others: weighted L"   ///
							   26 "PG+C vs others: Trace"        ///
							   27 "PG+C vs others: Xsq"          ///
							   28 "PG+C vs others: Min Distance" ///
							   29 "Global weighted L"            ///
							   30 "Global trace", modify
			label val index index
			
			label var dI    "difference in indices across cohorts"
			label var t_dI  "t-stat for diff across cohorts"
			label var index "index"

	}
	
	end

	
* Use routines to calculate sorting indices and compare them across cohorts
* ------------------------------------------------------------------------------

{
	global Ns = 5	// number of education groups
	local l1 "30"
	local l2 "40"
	local l3 "50"
	local l4 "60"
	local l5 "70"

	local c2 = 5	// 70s cohort
	forvalues c1=1/4 {
		qui use "$logfolder\bsSmx.dta", replace
		label drop coh
		qui keep if inlist(coh,`c1',`c2')
		qui recode coh (`c1'=1) (`c2'=2)	
		qui calcSortTestStat
		qui save "$logfolder/bsTests_`l`c2''vs`l`c1''.dta", replace
	}	
}

